This report exists to visually compare the bias-corrected CMIP6 data against the real-world observation data sources used to bias-correct them.
Bias corrected timelines for the sea surface temperature, sea surface salinity, bottom temperature, and bottom salinity will be compared against the reference observations (OISSTv2, SODA) to check how accurately they represent observed conditions, and whether that changes between our areas of interest.
Both the bias-corrections and the observational datasets (OISSTv2, SODA) have been prepared separately and are loaded below.
For clarity on what data is included/attributed to each region, the areas we focused on have been plotted below.
# Getting Timeseries and Shapefile Locations via gmRi
# returns path to oisst timeseries & path to shapefile:
region_groups <- c("nelme_regions", "gmri_sst_focal_areas", "lme", "nmfs_trawl_regions")
region_lookup <- map(region_groups, function(region_group){
mask_details <- get_timeseries_paths(region_group) }) %>%
setNames(region_groups)
# Grab a couple to use from each as contenders
# NMFS Trawl Regions
trawl_gb <- read_sf(region_lookup$nmfs_trawl_regions$georges_bank$shape_path)
trawl_gom <- read_sf(region_lookup$nmfs_trawl_regions$gulf_of_maine$shape_path)
trawl_mab <- read_sf(region_lookup$nmfs_trawl_regions$mid_atlantic_bight$shape_path)
trawl_sne <- read_sf(region_lookup$nmfs_trawl_regions$southern_new_england$shape_path)
# Load all the strata and just filter out the crap ones
trawl_full <- read_sf(str_c(res_path, "Shapefiles/BottomTrawlStrata/BTS_Strata.shp")) %>%
clean_names() %>%
filter(strata >= 01010 ,
strata <= 01760,
strata != 1310,
strata != 1320,
strata != 1330,
strata != 1350,
strata != 1410,
strata != 1420,
strata != 1490)
# DFO Data
dfo_path <- shared.path(group = "Mills Lab", folder = "Projects/DFO_survey_data/strata_shapefiles")
dfo_area <- read_sf(str_c(dfo_path, "MaritimesRegionEcosystemAssessmentBoundary.shp"))
# set overall zoom for maps
xlimz <- c(-76, -57)
ylimz <- c(35, 48)
# base map
base_map <- ggplot() +
geom_sf(data = new_england) +
geom_sf(data = canada) +
geom_sf(data = greenland) +
theme(legend.position = "bottom") +
guides(color = guide_legend(title.position = "top", title.hjust = 0.5)) +
labs(color = "",
fill = "")
# Build off of base map
base_map +
geom_sf(data = st_union(trawl_full), aes(fill = "Trawl Survey Area"), alpha = 0.2) +
geom_sf(data = st_union(trawl_gom), aes(fill = "Gulf of Maine Survey Area"), alpha = 0.2) +
coord_sf(xlim = xlimz, ylim = ylimz) +
labs(title = "NMFS Trawl Region")
# Map dfo data
base_map +
geom_sf(data = dfo_area, aes(fill = "DFO Ecosystems Assesment Boundary"), alpha = 0.2) +
coord_sf(xlim = xlimz, ylim = ylimz) +
labs(title = "DFO Region")
base_map +
geom_sf(data = st_union(trawl_full), aes(fill = "Trawl Survey Area"), alpha = 0.2) +
geom_sf(data = st_union(trawl_gom), aes(fill = "Gulf of Maine Survey Area"), alpha = 0.2) +
geom_sf(data = dfo_area, aes(fill = "DFO Ecosystems Assesment Boundary"), alpha = 0.2) +
coord_sf(xlim = xlimz, ylim = ylimz) +
labs(title = "All Areas")
# SODA vars
soda_ssal <- stack(str_c(soda_path, "SODA_Salt_Red.nc"))
soda_bsal <- stack(str_c(soda_path, "SODA_Salt_Red_bottomLayer.nc"))
soda_btemp <- stack(str_c(soda_path, "SODA_Temp_Red_bottomLayer.nc"))
# OISST
data_window <- data.frame(lon = c(-72, -61),
lat = c(39, 46.1),
time = as.Date(c("1981-09-01", "2020-12-31")))
# Load Data
oisst <- oisst_window_load(data_window = data_window,
anomalies = FALSE)
# Convert to Monthly
oisst <- stack(oisst)
# build a key for months using the layer names
month_key <- seq.Date(min(data_window$time), max(data_window$time), by = "month")
month_key <- str_c("X", month_key) %>% str_replace_all("-", ".")
month_key <- str_sub(month_key, 1, -4)
# Crop them
oisst_monthly <- map(month_key, function(wut_month){
# Identify corresponding days in month
which_layers <- str_detect(names(oisst), wut_month)
layer_indices <- which(which_layers)
if(length(layer_indices) == 0) { print(str_c(length(layer_indices), " days for ", wut_month)) }
# Get mean
month_mean <- calc(oisst[[layer_indices]], fun = mean, na.rm = T)
# set month as name
month_mean <- setNames(month_mean, wut_month)
return(month_mean)
})
# Stack
oisst_monthly <- stack(oisst_monthly)
# clean up
rm(oisst)
Cropping Functions
#### Processing Functions
# Masking function for an area
mask_shape <- function(in_ras, in_mask){
r1 <- crop(x = in_ras, y = in_mask)
r2 <- mask(x = r1, mask = in_mask)
return(r2)}
# Function to turn it into a dataframe using cellstats
timeseries_from_mask <- function(ras_in, var_name){
ts <- cellStats(ras_in, mean, na.rm = T)
ts <- ts %>%
as.data.frame() %>%
rownames_to_column() %>%
setNames(c("date", var_name)) %>%
mutate(date = str_remove(date, "X"),
date = str_replace_all(date, "[.]", "-"))
date_len <- str_length(ts$date[1])
if(date_len == 7){
ts <- ts %>%
mutate(date = str_c(date, "-15"),
date = as.Date(date))
} else{
ts <- mutate(ts, date = as.Date(date))
}
return(ts)
}
Processing Observed Variables
# Put observed variables in a list
observed_vars <- list(
bot_temp = soda_btemp,
bot_sal = soda_bsal,
surf_sal = soda_ssal,
surf_temp = oisst_monthly)
# Now mask them all and get timeseries
masked_vars <- imap(observed_vars, function(masking_var, var_name){
# Mask and get timeseries
masked_gom <- mask_shape(masking_var, trawl_gom)
masked_gom <- timeseries_from_mask(masked_gom, var_name)
masked_trawl <- mask_shape(masking_var, trawl_full)
masked_trawl <- timeseries_from_mask(masked_trawl,var_name)
masked_dfo <- mask_shape(masking_var, dfo_area)
masked_dfo <- timeseries_from_mask(masked_dfo, var_name)
# Put in list
list(
nefsc_gom = masked_gom,
nefsc_full = masked_trawl,
dfo_full = masked_dfo)
})
# Get Time Periods to SODA/OISST
soda_years <- unique(str_sub(names(soda_ssal), 2, 5))
oisst_years <- unique(str_sub(names(oisst_monthly), 2, 5))
# clean up
rm(soda_bsal, soda_btemp, soda_ssal, oisst_monthly)
# CMIP Bias Corrected
cmip_stemp <- stack(str_c(cmip_path, experiment, "/BiasCorrected/EnsembleData/surf_temp/surf_temp_OISST_bias_corrected_mean.grd"))
cmip_btemp <- stack(str_c(cmip_path, experiment, "/BiasCorrected/EnsembleData/bot_temp/bot_temp_SODA_bias_corrected_mean.grd"))
cmip_ssal <- stack(str_c(cmip_path, experiment, "/BiasCorrected/EnsembleData/surf_sal/surf_sal_SODA_bias_corrected_mean.grd"))
cmip_bsal <- stack(str_c(cmip_path, experiment, "/BiasCorrected/EnsembleData/bot_sal/bot_sal_SODA_bias_corrected_mean.grd"))
# Cmip matches to SODA/OISST Time Periods
soda_index <- which(str_sub(names(cmip_bsal), 2, 5) %in% soda_years)
oisst_index <- which(str_sub(names(cmip_stemp), 2, 5) %in% oisst_years)
# Put them in a list
cmip_vars <- list(
bot_temp = cmip_btemp,
bot_sal = cmip_bsal,
surf_sal = cmip_ssal,
surf_temp = cmip_stemp)
# Rotate them:
cmip_vars <- map(cmip_vars, raster::rotate)
# Now mask them all and get timeseries
masked_cmip <- imap(cmip_vars, function(masking_var, var_name){
# Mask and get timeseries
masked_gom <- mask_shape(masking_var, trawl_gom)
masked_gom <- timeseries_from_mask(masked_gom, var_name)
masked_trawl <- mask_shape(masking_var, trawl_full)
masked_trawl <- timeseries_from_mask(masked_trawl,var_name)
masked_dfo <- mask_shape(masking_var, dfo_area)
masked_dfo <- timeseries_from_mask(masked_dfo, var_name)
# put in list
list(
nefsc_gom = masked_gom,
nefsc_full = masked_trawl,
dfo_full = masked_dfo)
})
# CMIP Bias Corrected
cmip_stemp_5 <- stack(str_c(cmip_path, experiment, "/BiasCorrected/EnsembleData/surf_temp/surf_temp_OISST_bias_corrected_5thpercentile.grd"))
cmip_btemp_5 <- stack(str_c(cmip_path, experiment, "/BiasCorrected/EnsembleData/bot_temp/bot_temp_SODA_bias_corrected_5thpercentile.grd"))
cmip_ssal_5 <- stack(str_c(cmip_path, experiment, "/BiasCorrected/EnsembleData/surf_sal/surf_sal_SODA_bias_corrected_5thpercentile.grd"))
cmip_bsal_5 <- stack(str_c(cmip_path, experiment, "/BiasCorrected/EnsembleData/bot_sal/bot_sal_SODA_bias_corrected_5thpercentile.grd"))
# Put them in a list
cmip_vars_5 <- list(
bot_temp = cmip_btemp_5,#[[soda_index]],
bot_sal = cmip_bsal_5,#[[soda_index]],
surf_sal = cmip_ssal_5,#[[soda_index]],
surf_temp = cmip_stemp_5#[[oisst_index]]
)
# Rotate them:
cmip_vars_5 <- map(cmip_vars_5, raster::rotate)
# Now mask them all and get timeseries
masked_cmip_5 <- imap(cmip_vars_5, function(masking_var, var_name){
# Mask and get timeseries
masked_gom <- mask_shape(masking_var, trawl_gom)
masked_gom <- timeseries_from_mask(masked_gom, var_name)
masked_trawl <- mask_shape(masking_var, trawl_full)
masked_trawl <- timeseries_from_mask(masked_trawl,var_name)
masked_dfo <- mask_shape(masking_var, dfo_area)
masked_dfo <- timeseries_from_mask(masked_dfo, var_name)
# put in list
list(
nefsc_gom = masked_gom,
nefsc_full = masked_trawl,
dfo_full = masked_dfo
)
})
# CMIP Bias Corrected
cmip_stemp_95 <- stack(str_c(cmip_path, experiment, "/BiasCorrected/EnsembleData/surf_temp/surf_temp_OISST_bias_corrected_95thpercentile.grd"))
cmip_btemp_95 <- stack(str_c(cmip_path, experiment, "/BiasCorrected/EnsembleData/bot_temp/bot_temp_SODA_bias_corrected_95thpercentile.grd"))
cmip_ssal_95 <- stack(str_c(cmip_path, experiment, "/BiasCorrected/EnsembleData/surf_sal/surf_sal_SODA_bias_corrected_95thpercentile.grd"))
cmip_bsal_95 <- stack(str_c(cmip_path, experiment, "/BiasCorrected/EnsembleData/bot_sal/bot_sal_SODA_bias_corrected_95thpercentile.grd"))
# Put them in a list
cmip_vars_95 <- list(
bot_temp = cmip_btemp_95,#[[soda_index]],
bot_sal = cmip_bsal_95,#[[soda_index]],
surf_sal = cmip_ssal_95,#[[soda_index]],
surf_temp = cmip_stemp_95#[[oisst_index]]
)
# Rotate them:
cmip_vars_95 <- map(cmip_vars_95, raster::rotate)
# Now mask them all and get timeseries
masked_cmip_95 <- imap(cmip_vars_95, function(masking_var, var_name){
# Mask and get timeseries
masked_gom <- mask_shape(masking_var, trawl_gom)
masked_gom <- timeseries_from_mask(masked_gom, var_name)
masked_trawl <- mask_shape(masking_var, trawl_full)
masked_trawl <- timeseries_from_mask(masked_trawl,var_name)
masked_dfo <- mask_shape(masking_var, dfo_area)
masked_dfo <- timeseries_from_mask(masked_dfo, var_name)
# put in list
list(
nefsc_gom = masked_gom,
nefsc_full = masked_trawl,
dfo_full = masked_dfo
)
})
The goal of the bias correction methods is to (as the name implies) remove bias from the CMIP6 model runs, through comparison to real-world observations or reanalysis data sets that better reflect real-world data.
The bias-corrected data should mirror the intra-annual variations seen in the datasets used to bias-correct them, and the values should be close, and without an obvious bias.
The following plots showcase how the observational datasets fall in-line with the bias corrected means, 5th percentile, and 95th percentile data. With emphasis on intra-annual variations.
comparison_plot <- function(var_option, mask_option){
obs_data <- masked_vars[[var_option]][[mask_option]]
cmip_mean <- masked_cmip[[var_option]][[mask_option]]
cmip_5 <- masked_cmip_5[[var_option]][[mask_option]]
cmip_95 <- masked_cmip_95[[var_option]][[mask_option]]
# Add data source
cmip_mean$data_source <- "CMIP Mean"
cmip_5$data_source <- "CMIP 5th Perc."
cmip_95$data_source <- "CMIP 95th Perc."
# Separate action for labeling oisst/soda
if(var_option == "surf_temp"){
obs_data$data_source <- "OISSTv2"
} else{
obs_data$data_source <- "SODA"
}
# Filter Dates to focus on shared timeperiod
cmip_mean <- filter(cmip_mean, between(date, min(obs_data$date), max(obs_data$date)))
cmip_5 <- filter(cmip_5, between(date, min(obs_data$date), max(obs_data$date)))
cmip_95 <- filter(cmip_95, between(date, min(obs_data$date), max(obs_data$date)))
# format variable for display on axis
var_label <- str_replace(var_option, "_", " ")
var_label <- str_to_title(var_label)
ggplot() +
geom_line(data = obs_data, aes_string("date", var_option, color = "data_source")) +
geom_line(data = cmip_mean, aes_string("date", var_option, color = "data_source")) +
geom_line(data = cmip_5, aes_string("date", var_option, color = "data_source")) +
geom_line(data = cmip_95, aes_string("date", var_option, color = "data_source")) +
scale_color_gmri() +
labs(x = "", y = var_label, color = "") +
facet_zoom(xlim =c(as.Date("2000-01-01"), as.Date("2001-12-31")), zoom.size = .5) +
theme(legend.position = "bottom")
}
var_option <- "bot_temp"
mask_option <- "nefsc_gom"
comparison_plot(var_option = var_option, mask_option = mask_option)
mask_option <- "nefsc_full"
comparison_plot(var_option = var_option, mask_option = mask_option)
mask_option <- "dfo_full"
comparison_plot(var_option = var_option, mask_option = mask_option)
var_option = "surf_temp"
mask_option <- "nefsc_gom"
comparison_plot(var_option = var_option, mask_option = mask_option)
mask_option <- "nefsc_full"
comparison_plot(var_option = var_option, mask_option = mask_option)
mask_option <- "dfo_full"
comparison_plot(var_option = var_option, mask_option = mask_option)
var_option <- "bot_sal"
mask_option <- "nefsc_gom"
comparison_plot(var_option = var_option, mask_option = mask_option)
mask_option <- "nefsc_full"
comparison_plot(var_option = var_option, mask_option = mask_option)
mask_option <- "dfo_full"
comparison_plot(var_option = var_option, mask_option = mask_option)
var_option <- "surf_sal"
mask_option <- "nefsc_gom"
comparison_plot(var_option = var_option, mask_option = mask_option)
mask_option <- "nefsc_full"
comparison_plot(var_option = var_option, mask_option = mask_option)
mask_option <- "dfo_full"
comparison_plot(var_option = var_option, mask_option = mask_option)
The following plots showcase how the observational datasets fall in-line with the bias corrected means, 5th percentile, and 95th percentile data sets.
# Function to Plot Single Variable Trajectories for the Different Regions
clim_futures <- function(var_option){
# Subset the variables
obs_data <- masked_vars[[var_option]]
cmip_mean <- masked_cmip[[var_option]]
cmip_5 <- masked_cmip_5[[var_option]]
cmip_95 <- masked_cmip_95[[var_option]]
# Label the sources for the CMIP Data, process areas
obs_data <- map_dfr(obs_data, ~ mutate(.x, data_source = "Reference Data"), .id = "area")
cmip_mean <- map_dfr(cmip_mean, ~ mutate(.x, data_source = "CMIP Mean"), .id = "area")
cmip_5 <- map_dfr(cmip_5, ~ mutate(.x, data_source = "CMIP 5th Perc."), .id = "area")
cmip_95 <- map_dfr(cmip_95, ~ mutate(.x, data_source = "CMIP 95th Perc."), .id = "area")
# combine the different quantiles
cmip_all <- bind_rows(list(obs_data, cmip_mean, cmip_5, cmip_95)) %>%
mutate(data_source = factor(data_source,
levels = c("Reference Data", "CMIP 5th Perc.",
"CMIP Mean", "CMIP 95th Perc.")),
area = str_replace_all(area, "_", " "),
area = str_to_title(area),
area = str_replace(area, "Dfo", "DFO"),
area = str_replace(area, "Nefsc", "NEFSC"),
area = factor(area, levels = c("DFO Full", "NEFSC Gom", "NEFSC Full")),
yr = lubridate::year(date),
month = lubridate::month(date))
#group on year and get annual averayges
var_sym <- sym(var_option)
cmip_all <- cmip_all %>%
group_by(yr, area, data_source) %>%
summarise({{ var_sym }} := mean({{ var_sym }}, na.rm = T),
.groups = "keep") %>%
ungroup() %>%
mutate(yr = as.numeric(as.character(yr))) %>%
filter(yr >= 1982)
# format variable for display on axis
var_titles <- c(
"bot_temp" = expression("Bottom Temperature"~degree~"C"),
"surf_temp" = expression("Surface Temperature"~degree~"C"),
"bot_sal" = "Bottom Salinity",
"surf_sal" = "Surface Salinity")
var_label <- var_titles[var_option]
#pivot the sources wider so we can single them out asier
data_wide <- cmip_all %>%
pivot_wider(names_from = data_source, values_from = {{var_option}})
# Build plot
regional_comparison_plot <- ggplot(data_wide, aes(x = yr)) +
geom_ribbon(aes(ymin = `CMIP 5th Perc.`,
ymax = `CMIP 95th Perc.`),
fill = "gray80") +
geom_line(aes(y = `CMIP Mean`,
color = "CMIP Mean")) +
geom_line(aes(y = `Reference Data`, color = "Reference Data")) +
geom_point(aes(y = `Reference Data`, color = "Reference Data"),
size = 0.25) +
scale_color_manual(values = c(
"Reference Data" = "gray10",
"CMIP Mean" = "gray50")) +
facet_wrap(~area, nrow = 3) +
labs(x = "", y = var_label, color = "") +
theme(legend.position = "bottom")
return(regional_comparison_plot)
}
clim_futures(var_option = "bot_temp")
clim_futures(var_option = "surf_temp")
clim_futures(var_option = "bot_sal")
clim_futures(var_option = "surf_sal")